Skip to main content

Programmatisk nedlasting av data

Nedlastingsløsningen har et HTTP/JSON-grensesnitt for de som har behov for å automatisere nedlasting av data. For manuell nedlasting finnes det informasjon her.

Grensesnittet støtter token-sikkerhet og basic authentication. Kallet under, med token, gir en JSON-respons med informasjon om de filene brukeren har tilgang til. Hvis man utelater token-parameteren blir basic authentication benyttet.

https://nedlasting.geodataonline.no/json?token=<ditt_token>

Informasjon om hvordan man lager et token finnes her.

Responsen fra kallet er på formen under, med informasjon om oppdateringstidspunkt, størrelse, url etc.

[
{
"Name":"POST_Posten.zip",
"FileName":"POST_Posten.zip",
"BucketName":"geodata-gdonline-gdo-processing-products",
"LastModified":"2021-11-17T12:25:28+00:00",
"Size":1102875,
"Url":"https://nedlasting.geodataonline.no/download/geodata-gdonline-gdo-processing-products/POST_Posten.zip"
}
...
]

Kodeeksempel

Under er et eksempel-skript i Python som laster ned data fra Geodata Online. Skriptet viser også hvordan man pakker ut zip-filer, inkludert zip-filer med sensitive data som kan være passordbeskyttet (kryptert).

# -*- encoding: UTF-8 -*-
import os
import shutil
import time
import datetime
import codecs
import json
import argparse
import requests
import logging
import zipfile
import pyzipper # Needed to unzip encrypted zip files. Can be pip installed.

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("download")
args = None
os.chdir(os.path.dirname(os.path.realpath(__file__)))


def main():
global args
parser = argparse.ArgumentParser(description='Downloads a file from Geodata Online.')
parser.add_argument('--filename', required=True, help='Name of file to download')
parser.add_argument('--username', required=True, help='Geodata Online username')
parser.add_argument('--password', required=True, help='Geodata Online password')
parser.add_argument('--zippassword', required=False, help='Password that zip file is encrypted with')
parser.add_argument('--download-url',
help='Url to the Geodata Online download portal.',
default="https://nedlasting.geodataonline.no/")
parser.add_argument('--download-path', required=False, default=".",
help='Local directory to download the file to.')
args = parser.parse_args()
logger.info("Downloading file %s from %s" % (args.filename, args.download_url))
token = generate_token().get("token")
index = download_index(token)
entries = [entry for entry in index if entry["Name"].rpartition("/")[2] == args.filename]
if not entries:
raise ValueError("File %s not found in index of downloadable files." % args.filename)
entry = entries[0]
local_file = os.path.join(args.download_path, args.filename)
if should_download(entry, local_file):
download_file(entry["Url"] + "?token=" + token, local_file)
last_modified = get_last_modified(entry)
os.utime(local_file, (last_modified, last_modified))
if local_file.lower().endswith(".zip"):
unzip_path = os.path.join(args.download_path, "unzipped")
logger.info("Unzipping file %s to path %s" % (local_file, unzip_path))
if os.path.exists(unzip_path):
shutil.rmtree(unzip_path)
if not args.zippassword:
with zipfile.ZipFile(local_file, "r") as zf:
zf.extractall(unzip_path)
else:
with pyzipper.AESZipFile(local_file, "r") as zf:
zf.setpassword(args.zippassword.encode("utf-8"))
zf.extractall(unzip_path)
logger.info("Download complete.")


def download_index(token):
download_file(args.download_url + "/json?token=" + token, "index.json")
with codecs.open("index.json") as f:
return json.load(f)


def download_file(url, local_file):
logger.info("Downloading url %s to file %s" % (url, local_file))
headers = {
"referer": "https://www.geodata.no"
}
with requests.get(url, headers=headers, stream=True) as response:
response.raise_for_status()
with open(local_file, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024*1024): # 1MB chunks
f.write(chunk)
logger.info("Done downloading file.")


def generate_token(token_service_url="https://services.geodataonline.no/arcgis/tokens/generateToken",
referer="https://www.geodata.no", expiration_in_minutes=60):
response = requests.post(token_service_url, data={
'username': args.username,
'password': args.password,
'client': 'referer',
'referer': referer,
'expiration': expiration_in_minutes,
'f': 'json'
}, verify=True)
try:
response = response.json()
except Exception:
raise ValueError("Unable to parse response from url %s into json." % token_service_url)
if 'token' not in response or 'expires' not in response:
raise ValueError("Unexpected response %s when generating token." % json.dumps(response))
return response


def get_last_modified(entry):
if "LastModified" not in entry.keys():
raise ValueError("Index entry has no LastModified property.")
lastModifiedStr = entry["LastModified"]
timeZoneOffset = lastModifiedStr[19:]
lastModifiedStr = lastModifiedStr[:19]
lastModified = datetime.datetime.strptime(lastModifiedStr, "%Y-%m-%dT%H:%M:%S")
if timeZoneOffset:
offsetInMinutes = 0
if timeZoneOffset[0] == '+':
offsetInMinutes = int(timeZoneOffset[1:3])*60 + int(timeZoneOffset[4:6])
elif timeZoneOffset[0] == '-':
offsetInMinutes = 0 - (int(timeZoneOffset[1:3])*60 + int(timeZoneOffset[4:6]))
lastModified = lastModified + datetime.timedelta(minutes = 0-offsetInMinutes)
logger.info("Parsed time %s to datetime %s" % (entry["LastModified"], lastModified))
lastModifiedEpochTime = round((lastModified - datetime.datetime(1970,1,1)).total_seconds())
return lastModifiedEpochTime


def should_download(entry, local_file):
if not os.path.exists(local_file):
logger.info("Local file %s does not exist, proceeding to download." % local_file)
return True
if "Size" not in entry.keys():
logger.warn("Index entry has no Size property, proceeding to download.")
return True
stats = os.stat(local_file)
logger.debug("Local file %s exists with size %d bytes, last modified: %s" % (
local_file, stats.st_size, time.ctime(stats.st_mtime)))
downloadSize = entry["Size"]
lastModified = get_last_modified(entry)
if downloadSize != stats.st_size:
logger.info("Download has different size, downloading file.")
return True
elif lastModified > stats.st_mtime:
logger.info("Download has same size but is newer, downloading file.")
return True
logger.info("Download has same size as local file and is not newer, skipping download.")
return False


if __name__ == '__main__':
main()
print("Script has reached end.")